####GSE87466--------
setwd("")
gene<-read.table("gene.txt",sep="\t")
gene<-as.matrix(gene)
probe_gene<-read.table("GPL13158-5065.txt",sep="\t")
probe_gene<-as.matrix(probe_gene)

exp<-read.table("GSE87466_series_matrix.txt",sep="\t")
exp<-as.matrix(exp)
probe_gene<-probe_gene[which(probe_gene[,1] %in% exp[,1]),]
probe<-probe_gene[which(probe_gene[,2] %in% gene[,1]),]
probe_exp<-matrix(nrow=dim(probe)[1],ncol=dim(exp)[2])
for(i in 1:dim(probe)[1]){
  probe_exp[i,]<-exp[which(exp[,1]==probe[i,1]),]
}
result<-cbind(probe,probe_exp)
write.table(result,"GSE87466_exp.txt",row.names=F,col.names=T,sep="\t",quote=F)

library(ggplot2)
library(ggbiplot)
exp_2 <- read.table("GSE87466_exp_mean.txt",sep="\t",header=T,row.names = 1)
group <- data.frame(sample=colnames(exp_2),group=c(rep("UC",87), rep("Normal",21)))
rowwilcox<-rowSums(exp_2)%>%as.matrix()
exp_2 <- subset(exp_2, rowwilcox != 0)
exp_2 <-t(exp_2)

pca_result <- prcomp(exp_2,
                     scale=T
)

ggbiplot(pca_result, 
         var.axes=F,           
         obs.scale = 1,        
         groups = group[,2],  
         ellipse = T,          
         circle = F)
# geom_text(                     
#   aes(label=rownames(exp_2)),   
#   vjust=1.5,          
#   size=2               
# )


#DEG
library(limma)
library(dplyr)
library(DESeq2)
library("amap")
library("ggplot2")
library("BiocParallel")
df <- t(exp_2)
df <- df[which(rowSums(df)!=0),]
head(df)

list <- c(rep("tumor", 87), rep("normal",21)) %>% factor(., levels = c("tumor", "normal"), ordered = F)
##--------------
head(list)

list <- model.matrix(~factor(list)+0)  
colnames(list) <- c("tumor","normal" )
df.fit <- lmFit(df, list)  

df.matrix <- makeContrasts(tumor - normal , levels = list)
fit <- contrasts.fit(df.fit, df.matrix)
fit <- eBayes(fit)
tempOutput <- topTable(fit,n = Inf, adjust = "fdr")
nrDEG = na.omit(tempOutput) 
diffsig <- nrDEG  
write.table(diffsig, "limmaOut.txt",row.names = T,col.names = T,quote = F,sep="\t")
diffsig_out <- diffsig[diffsig$P.Value < 0.05 & abs(diffsig$logFC) > 1,]
write.table(diffsig_out, "sig_limmaOut.txt",row.names = T,col.names = T,quote = F,sep="\t")
diffsig_out <- read.table("sig_limmaOut.txt",sep="\t",header=T,row.names = 1)
library(ggplot2)
library(ggpubr)
library(ggthemes)
#install.packages("ggthemes")
DEG <- diffsig
deg.data<-DEG
deg.data <- deg.data[complete.cases(deg.data), ]
deg.data$logP <- -log10(deg.data$P.Value)

deg.data$DE_genes = "not significant"
deg.data$DE_genes[which((deg.data$P.Value<0.05)&(deg.data$logFC>1))] = "up"
deg.data$DE_genes[which((deg.data$P.Value<0.05)&(deg.data$logFC<(-1)))] = "down"

table(deg.data$DE_genes)

deg.data$Label = ""
deg.data$P.Value <- as.vector(deg.data$P.Value)
deg.data <- deg.data[order(deg.data$P.Value),]
top5_gene <- rownames(deg.data[order(-deg.data$logFC),])[1:5]
bottom5_gene <- rownames(deg.data[order(deg.data$logFC),])[1:5]
deg.3genes <- c(bottom5_gene,top5_gene)
deg.data$Label[match(deg.3genes,rownames(deg.data))] <- deg.3genes

options(ggrepel.max.overlaps = Inf)
volcano <- 
  ggscatter(deg.data, x="logFC", y= "logP",
            color="DE_genes",
            palette = c("#2f5688","black","#CC0000"),
            size=1,
            label = deg.data$Label,
            font.label = 8,
            label.rectangle = T,
            label.select = deg.data$Label,
            repel = T,
            show.legend.text = F,
            xlab = "logFC",
            ylab = "-log10P")+
  theme_base() +
  geom_hline(yintercept=1.3,linetype="dashed",colour = "#999999")+
  geom_vline(xintercept=c(-1,1),linetype="dashed",colour = "#999999")+
  xlim(-6,6) + ylim(0,50)

volcano
df <- exp_2[rownames(diffsig_out),]
library(pheatmap)
pheatmap(df,scale = "row",show_rownames = F,show_colnames = F, cluster_rows = F,cluster_cols = F)
lables=group
lables=group[,2,drop=F]
rownames(lables)=group$sample
rownames(lables)=colnames(df)

groupcolor <- c("coral","aquamarine")
names(groupcolor) <- c("UC","Normal")

ann_colors <- list(group=groupcolor) 
# ,age=Agecolor,gender=Sexcolor 

pheatmap(df ,cluster_rows = F,cluster_cols = F,
         show_colnames = F,border_color = NA,show_rownames =F,scale = "row",
         annotation_col = lables, annotation_colors = ann_colors)



####GSE66407--------
setwd("")
exp<-read.table("GSE66407_series_matrix.txt",sep="\t",row.names=1)
exp<-as.matrix(exp)
table(exp[1,])
exp_Y<-exp[,which(exp[1,]=="UC")]
exp_N<-exp[,which(exp[1,]=="Control")]
result<-cbind(exp_Y,exp_N)
write.table(result,"GSE66407_exp_sort.txt",quote=F,sep="\t")
library(msigdbr)
library(tidyverse)
library(clusterProfiler)
count <- read.table("GSE66407_exp_sort.txt",header=T,sep="\t")
name <- bitr(count$ID_REF,fromType = 'ENSEMBL',toType = 'SYMBOL',OrgDb = 'org.Hs.eg.db')
count2 <- right_join(name,count,by=c("ENSEMBL"="ID_REF"))

countf1 <- left_join(name,count,by=c("ENSEMBL"="ID_REF"))
countf1 <- aggregate(x = countf1[,3:ncol(countf1)],   
                     by = list(symbol = countf1$SYMBOL),   
                     FUN = mean) %>%  
  column_to_rownames(var = 'symbol')

write.table(countf1, "GSE66407_exp.txt",row.names = T,col.names = T,quote = F,sep="\t")


library(ggplot2)
library(ggbiplot)
exp_2 <- read.table("GSE66407_exp.txt",sep="\t",header=T,row.names = 1)
group <- data.frame(sample=colnames(exp_2),group=c(rep("UC",161), rep("Normal",99)))
rowwilcox<-rowSums(exp_2)%>%as.matrix()
exp_2 <- subset(exp_2, rowwilcox != 0)
exp_2 <-t(exp_2)

pca_result <- prcomp(exp_2,
                     scale=T  
                     
                     ggbiplot(pca_result, 
                              var.axes=F,           
                              obs.scale = 1,        
                              groups = group[,2],  
                              ellipse = T,           
                              circle = F)
                     # geom_text(                     
                     #   aes(label=rownames(exp_2)),   
                     #   vjust=1.5,            
                     #   size=2               
                     # )
                     
                     
                     #DEG
                     library(limma)
                     library(dplyr)
                     library(DESeq2)
                     library("amap")
                     library("ggplot2")
                     library("BiocParallel")
                     df <- t(exp_2)
                     df <- df[which(rowSums(df)!=0),]
                     head(df)
                     
                     list <- c(rep("tumor", 161), rep("normal",99)) %>% factor(., levels = c("tumor", "normal"), ordered = F)
                     ##--------------
                     head(list)
                     
                     list <- model.matrix(~factor(list)+0) 
                     colnames(list) <- c("tumor","normal" )
                     df.fit <- lmFit(df, list)  
                     
                     df.matrix <- makeContrasts(tumor - normal , levels = list)
                     fit <- contrasts.fit(df.fit, df.matrix)
                     fit <- eBayes(fit)
                     tempOutput <- topTable(fit,n = Inf, adjust = "fdr")
                     nrDEG = na.omit(tempOutput) 
                     diffsig <- nrDEG  
                     write.table(diffsig, "limmaOut.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     diffsig_out <- diffsig[diffsig$P.Value < 0.05 & abs(diffsig$logFC) > 1,]
                     write.table(diffsig_out, "sig_limmaOut.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     library(ggplot2)
                     library(ggpubr)
                     library(ggthemes)
                     #install.packages("ggthemes")
                     DEG <- diffsig
                     deg.data<-DEG
                     deg.data <- deg.data[complete.cases(deg.data), ]
                     deg.data$logP <- -log10(deg.data$P.Value)
                     
                     deg.data$DE_genes = "not significant"
                     deg.data$DE_genes[which((deg.data$P.Value<0.05)&(deg.data$logFC>1))] = "up"
                     deg.data$DE_genes[which((deg.data$P.Value<0.05)&(deg.data$logFC<(-1)))] = "down"
                     
                     table(deg.data$DE_genes)
                     
                     deg.data$Label = ""
                     deg.data$P.Value <- as.vector(deg.data$P.Value)
                     deg.data <- deg.data[order(deg.data$P.Value),]
                     top5_gene <- rownames(deg.data[order(-deg.data$logFC),])[1:5]
                     bottom5_gene <- rownames(deg.data[order(deg.data$logFC),])[1:5]
                     deg.3genes <- c(bottom5_gene,top5_gene)
                     deg.data$Label[match(deg.3genes,rownames(deg.data))] <- deg.3genes
                     
                     options(ggrepel.max.overlaps = Inf)
                     volcano <- 
                       ggscatter(deg.data, x="logFC", y= "logP",
                                 color="DE_genes",
                                 palette = c("#2f5688","black","#CC0000"),
                                 size=1,
                                 label = deg.data$Label,
                                 font.label = 8,
                                 label.rectangle = T,
                                 label.select = deg.data$Label,
                                 repel = T,
                                 show.legend.text = F,
                                 xlab = "logFC",
                                 ylab = "-log10P")+
                       theme_base() +
                       geom_hline(yintercept=1.3,linetype="dashed",colour = "#999999")+
                       geom_vline(xintercept=c(-1,1),linetype="dashed",colour = "#999999")+
                       xlim(-3,3) + ylim(0,30)
                     
                     volcano
                     df <- df[rownames(diffsig_out),]
                     library(pheatmap)
                     pheatmap(df,scale = "row",show_rownames = F,show_colnames = F, cluster_rows = F,cluster_cols = F)
                     lables=group
                     lables=group[,2,drop=F]
                     rownames(lables)=group$sample
                     rownames(lables)=colnames(df)
                     
                     groupcolor <- c("coral","aquamarine")
                     names(groupcolor) <- c("UC","Normal")
                     
                     ann_colors <- list(group=groupcolor) 
                     # ,age=Agecolor,gender=Sexcolor 
                     
                     pheatmap(df,cluster_rows = F,cluster_cols = F,
                              show_colnames = F,border_color = NA,show_rownames =F,scale = "row",
                              annotation_col = lables, annotation_colors = ann_colors)
                     
                     
                     ####GSE75214--------
                     setwd("")
                     exp<-read.table("GSE75214_series_matrix.txt",sep="\t",row.names=1,check.names = F)
                     exp<-as.matrix(exp)
                     table(exp[1,])
                     exp_Y<-exp[,which(exp[1,]=="ulcerative_colitis")]
                     exp_N<-exp[,which(exp[1,]=="control")]
                     result<-cbind(exp_Y,exp_N)
                     write.table(result,"GSE75214_exp_sort.txt",quote=F,sep="\t")
                     
                     gene<-read.table("gene.txt",sep="\t")
                     gene<-as.matrix(gene)
                     probe_gene<-read.table("GPL6244-17930.txt",sep="\t",check.names = F)
                     probe_gene<-as.matrix(probe_gene)
                     
                     exp<-read.table("GSE75214_exp_sort.txt",sep="\t")
                     exp<-as.matrix(exp)
                     probe_gene<-probe_gene[which(probe_gene[,1] %in% exp[,1]),]
                     probe<-probe_gene[which(probe_gene[,2] %in% gene[,1]),]
                     probe_exp<-matrix(nrow=dim(probe)[1],ncol=dim(exp)[2])
                     for(i in 1:dim(probe)[1]){
                       probe_exp[i,]<-exp[which(exp[,1]==probe[i,1]),]
                     }
                     result<-cbind(probe,probe_exp)
                     write.table(result,"GSE75214_exp.txt",row.names=F,col.names=T,sep="\t",quote=F)
                     
                     library(ggplot2)
                     library(ggbiplot)
                     exp_2 <- read.table("GSE75214_exp_mean.txt",sep="\t",header=T,row.names = 1)
                     group <- data.frame(sample=colnames(exp_2),group=c(rep("UC",97), rep("Normal",22)))
                     rowwilcox<-rowSums(exp_2)%>%as.matrix()
                     exp_2 <- subset(exp_2, rowwilcox != 0)
                     exp_2 <-t(exp_2)
                     
                     pca_result <- prcomp(exp_2,
                                          scale=T  
                     )
                     
                     ggbiplot(pca_result, 
                              var.axes=F,            
                              obs.scale = 1,       
                              groups = group[,2], 
                              ellipse = T,         
                              circle = F)
                     
                     library(WGCNA)
                     exprSet<- read.table("GSE75214_exp_mean.txt",header=T,sep="\t",row.names = 1)
                     exprSet  <- exprSet [which(rowSums(exprSet )!=0),]
                     nSamples = nrow(exprSet)
                     WGCNA0 <- exprSet
                     WGCNA0 <- t(WGCNA0)
                     vars_res <- apply(WGCNA0, 2, var)
                     per_res <- quantile(vars_res, probs = seq(0, 1, 0.25))
                     per_res
                     upperGene <- WGCNA0[, which(vars_res > per_res[4])]
                     dim(upperGene) 
                     WGCNA1 <- data.matrix(upperGene)
                     nGenes <- ncol(WGCNA1)
                     nSamples <- nrow(WGCNA1)
                     head(WGCNA1[1:6,1:6])
                     group<- data.frame(sample=colnames(exprSet),group=c(rep("UC",97), rep("Normal",22)))
                     group<-group[,2,drop=F]
                     write.table(group,"WGCNA_group.txt",row.names=T,col.names=T,sep="\t",quote=F)
                     
                     datTraits <- read.table("WGCNA_group.txt",header=T,sep="\t",row.names = 1)
                     library(stringr)
                     datTraits=as.data.frame(datTraits)
                     head(datTraits)
                     gsg <- goodSamplesGenes(WGCNA1, verbose = 3);
                     gsg[["allOK"]] 
                     sampleTree <- hclust(dist(WGCNA1), method = "average")
                     traitColors <- numbers2colors(as.numeric(factor(datTraits$group)), 
                                                   colors = rainbow(length(table(datTraits$group))), 
                                                   signed = FALSE)
                     plotDendroAndColors(sampleTree, 
                                         traitColors,
                                         groupLabels = names(datTraits),
                                         main = "Sample dendrogram and trait heatmap")
                     
                     powers <- c(c(1:10), 
                                 seq(from = 12, 
                                     to = 30,
                                     by = 2))
                     sft <- pickSoftThreshold(as.matrix(WGCNA1),
                                              powerVector = powers, 
                                              networkType = "signed")
                     power=sft$powerEstimate
                     type = "unsigned"
                     if (is.na(power)){
                       power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
                                      ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
                                             ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
                                                    ifelse(type == "unsigned", 6, 12))       
                                      )
                       )
                     }
                     
                     # powerEstimate = sft$powerEstimate
                     powerEstimate=power
                     par(mfrow = c(1,2))  
                     cex1 = 0.9  
                     
                     plot(sft$fitIndices[,1],  
                          -sign(sft$fitIndices[,3])*sft$fitIndices[,2], 
                          xlab = "Soft Threshold (power)", 
                          ylab = "Scale Free Topology Model Fit,signed R^2",  
                          type = "n",
                          main = paste("Scale independence")) +  
                       text(sft$fitIndices[,1],  
                            -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
                            labels = powers, 
                            cex = cex1,
                            col = "steelblue") +
                       abline(h = 0.8,  
                              col = "red")
                     
                     plot(sft$fitIndices[,1],  
                          sft$fitIndices[,5],  
                          xlab = "Soft Threshold (power)", 
                          ylab = "Mean Connectivity",  
                          type="n",  
                          main = paste("Mean connectivity")) +  
                       text(sft$fitIndices[,1],  
                            sft$fitIndices[,5],
                            labels = powers, 
                            cex = cex1, 
                            col = "steelblue")
                     enableWGCNAThreads() 
                     if(T){
                       net = blockwiseModules(
                         as.matrix(WGCNA1),  
                         power = powerEstimate,  
                         maxBlockSize = nGenes,  
                         TOMType = "unsigned",  
                         minModuleSize = 30, 
                         reassignThreshold = 0,  
                         mergeCutHeight = 0.25,  
                         numericLabels = TRUE,  
                         pamRespectsDendro = FALSE,  
                         saveTOMs = F,  
                         verbose = 3 
                       )
                       table(net$colors)  
                     }
                     if(T){      
                       moduleColors=labels2colors(net$colors)
                       table(moduleColors)  
                       
                       plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
                                           "Module colors",
                                           dendroLabels = FALSE, hang = 0.03,
                                           addGuide = TRUE, guideHang = 0.05)
                     }
                     
                     library(WGCNA) 
                     library(forcats) 
                     datTraits$group <- factor(datTraits$group)  
                     
                     levels(datTraits$group)
                     if(T){ 
                       nGenes = ncol(WGCNA1) 
                       nSamples = nrow(WGCNA1) 
                       design <- model.matrix(~0+datTraits$group)    colnames(design)= levels(datTraits$group)  
                       MES0 <- moduleEigengenes(WGCNA1,moduleColors)$eigengenes  
                       MEs = orderMEs(MES0)  
                       moduleTraitCor <- cor(MEs,design,use = "p")  
                       moduleTraitPvalue <- corPvalueStudent(moduleTraitCor,nSamples)  
                       textMatrix = paste(signif(moduleTraitCor,2),"\n(",
                                          signif(moduleTraitPvalue,1),")",sep = "")  
                       dim(textMatrix)=dim(moduleTraitCor) 
                       par(mar=c(6, 8.5, 3, 3))  
                       labeledHeatmap(Matrix = moduleTraitCor,  
                                      xLabels = colnames(design), 
                                      yLabels = names(MEs),  
                                      ySymbols = names(MEs), 
                                      colorLabels = FALSE, 
                                      colors = blueWhiteRed(50),  
                                      textMatrix = textMatrix,  
                                      setStdMargins = FALSE, 
                                      cex.text = 0.5,  
                                      zlim = c(-1,1),  
                                      main = paste("Module-trait relationships"))  
                     }
                     if(T){
                       modNames = substring(names(MEs), 3)    geneModuleMembership = as.data.frame(cor(WGCNA1, MEs,
                                                                                                       use = "p",method = "spearman"))  
                       MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))  
                       names(geneModuleMembership) = paste("MM", modNames, sep="")
                       names(MMPvalue) = paste("p.MM", modNames, sep="") 
                       
                       geneTraitSignificance <- as.data.frame(cor(WGCNA1,datTraits$groupNo,use = "p"))  
                       
                       GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),nSamples))  
                       names(geneTraitSignificance)<- paste("GS.",names(datTraits$group),sep = "")    names(GSPvalue)<-paste("GS.",names(datTraits$group),sep = "")  
                       
                       selectModule<-c("brown")  
                       #selectModule <- modNames  
                       
                       par(mfrow=c(ceiling(length(selectModule)/2),1))  
                       for(module in selectModule){
                         column <- match(module,selectModule)  
                         print(module)  
                         moduleGenes <- moduleColors==module  
                         verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                                            abs(geneTraitSignificance[moduleGenes, 1]),
                                            xlab = paste("Module Membership in", module, "module"),
                                            ylab = paste("Gene significance for", module, "module"),
                                            main = paste("Module membership vs. gene significance\n"),
                                            cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
                       }
                     }
                     if(T){
                       geneTree = net$dendrograms[[1]]
                       TOM=TOMsimilarityFromExpr(WGCNA1,power=20)
                       dissTOM=1-TOM
                       
                       
                       
                       nSelect =200
                       set.seed(10)
                       select=sample(nGenes,size = nSelect)
                       selectTOM = dissTOM[select,select]
                       selectTree = hclust(as.dist(selectTOM),method = "average")
                       selectColors = moduleColors[select]
                       plotDiss=selectTOM^7
                       diag(plotDiss)=NA
                       TOMplot(plotDiss,selectTree,selectColors,main="Network heapmap of selected gene")
                     }
                     if(T){ 
                       MEs=moduleEigengenes(WGCNA1,moduleColors)$eigengenes  
                       MET = orderMEs(cbind(MEs,datTraits$groupNo))  
                       par(cex = 0.9) 
                       #png("step6-Eigengene-dendrogram.png",width = 800,height = 600) 
                       plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
                                             = 90,excludeGrey = FALSE)  
                     }
                     if(T){ 
                       module="red" 
                       probes = colnames(WGCNA1)  
                       inModule = (moduleColors==module)  
                       modProbes=probes[inModule]  
                       head(modProbes)  
                       modTOM = TOM[inModule,inModule]    dimnames(modTOM)=list(modProbes,modProbes)  
                       
                       nTop=523  
                       IMConn = softConnectivity(WGCNA1[,modProbes])  
                       top=(rank(-IMConn)<=nTop)    filterTOM=modTOM[top,top]  
                       # for visANT
                       vis = exportNetworkToVisANT(filterTOM,file = paste("visANTinput-",module,".txt",sep = ""),
                                                   weighted = T,threshold = 0)     
                       # for cytoscape
                       cyt = exportNetworkToCytoscape(filterTOM,
                                                      edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
                                                      nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
                                                      weighted = TRUE,
                                                      threshold = 0.02,
                                                      nodeNames = modProbes[top], 
                                                      nodeAttr = moduleColors[inModule][top]) 
                     }
                     
                     
                     
                     
                     ####validation--------
                     setwd("")
                     gene<-read.table("gene.txt",sep="\t")
                     gene<-as.matrix(gene)
                     probe_gene<-read.table("GPL13158-5065.txt",sep="\t")
                     probe_gene<-as.matrix(probe_gene)
                     
                     exp<-read.table("GSE92415_series_matrix.txt",sep="\t")
                     exp<-as.matrix(exp)
                     probe_gene<-probe_gene[which(probe_gene[,1] %in% exp[,1]),]
                     probe<-probe_gene[which(probe_gene[,2] %in% gene[,1]),]
                     probe_exp<-matrix(nrow=dim(probe)[1],ncol=dim(exp)[2])
                     for(i in 1:dim(probe)[1]){
                       probe_exp[i,]<-exp[which(exp[,1]==probe[i,1]),]
                     }
                     result<-cbind(probe,probe_exp)
                     write.table(result,"GSE92415_exp.txt",row.names=F,col.names=T,sep="\t",quote=F)
                     
                     library(ggplot2)
                     library(ggbiplot)
                     library(survival)
                     library(survminer) 
                     exp_2 <- read.table("GSE92415_exp_mean.txt",sep="\t",header=T,row.names = 1)
                     meta_model <- read.table("meta.txt",sep="\t",header=T,row.names = 1)
                     key.gene_exp <-exp_2[c("CASP1","CASP4"),rownames(meta_model)]
                     
                     merge_data <- cbind(t(key.gene_exp),meta_model)
                     cox_model <- coxph(Surv(score, response) ~ ., data = merge_data)
                     coeficients <- summary(cox_model)$coefficients
                     merge_data$risk_scores <- predict(cox_model, newdata =merge_data,type = "risk")
                     library(survival)
                     library(survminer)
                     library(timeROC)
                     
                     
                     ROC_rt=timeROC(T=merge_data$score,delta=merge_data$response,
                                    marker=merge_data$risk_scores,cause=1,
                                    weighting='aalen',
                                    times=c(1,3,5),ROC=TRUE)
                     plot(ROC_rt,time=1,col='green',title=FALSE,lwd=2)
                     plot(ROC_rt,time=3,col='blue',add=TRUE,title=FALSE,lwd=2)
                     plot(ROC_rt,time=5,col='red',add=TRUE,title=FALSE,lwd=2)
                     legend('bottomright',
                            c(paste0('AUC at 1 : ',sprintf("%.03f",ROC_rt$AUC[1])),
                              paste0('AUC at 3 : ',sprintf("%.03f",ROC_rt$AUC[2])),
                              paste0('AUC at 5 : ',sprintf("%.03f",ROC_rt$AUC[3]))),
                            col=c("green",'blue','red'),lwd=2,bty = 'n')
                     
                     library(rms)
                     vl <- colnames(merge_data)[1:2]
                     
                     formula_str <- paste(vl, collapse = "+")
                     formula_obj <- as.formula(paste("Surv(time = score, event = response) ~", formula_str))
                     dd <- datadist(merge_data)
                     options(datadist="dd")
                     f <- cph(formula_obj, x=T, y=T, surv=T, data=merge_data, time.inc=1)
                     surv <- Survival(f)
                     #建立nomogram
                     nom <- nomogram(f, fun=list(function(x) surv(1, x), function(x) surv(3, x), function(x) surv(5, x)), 
                                     lp=F, funlabel=c("1-year survival", "3-year survival", "5-year survival"), 
                                     maxscale=100, 
                                     fun.at=c(0.99, 0.9, 0.8, 0.7, 0.5, 0.3,0.1,0.01))  
                     
                     plot(nom)
                     
                     
                     time=6   #预测年限
                     f <- cph(formula_obj, x=T, y=T, surv=T, data=merge_data, time.inc=time)
                     cal <- calibrate(f, cmethod="KM", method="boot", u=time, m=100, B=500)
                     plot(cal,
                          xlim=c(0,1),
                          ylim=c(0,1),
                          xlab=paste0("Nomogram-Predicted Probability of ", time, "-Month OS"),
                          ylab=paste0("Actual ", time, "-Month OS(proportion)"), lwd=1.5,
                          col="red", sub=T)
                     
                     
                     
                     #########################KM曲线-----------------
                     validation <- merge_data
                     
                     
                     res.cut <- surv_cutpoint(validation, time = "score", event = "response",
                                              variables = c("risk_scores"))
                     res.cut
                     validation$risk_group=as.vector(ifelse(validation$risk_scores> res.cut[["cutpoint"]][["cutpoint"]],"high","low"))
                     
                     fit <- survfit(Surv(score, response) ~risk_group, data = validation)
                     data_differ <- survdiff(Surv(score, response) ~risk_group, data = validation)
                     HR <- (data_differ$obs[2]/data_differ$exp[2])/(data_differ$obs[1]/data_differ$exp[1])
                     p <- ggsurvplot(fit,data = validation,conf.int = F,pval = T,xlab="mayo score",
                                     risk.table = F,
                                     pval.method = T,
                                     ggtheme = theme_classic2(),
                                     surv.median.line = "hv",
                                     ncensor.plot=F,
                                     
                                     
                                     palette = c("#D41F76","#80C37D"),
                                     pval.size = 4)
                     
                     library(ggplot2)
                     library(dcurves)
                     cox1 <- coxph(Surv(time=merge_data$score,event=merge_data$response) ~  risk_scores,data=merge_data)
                     cox2 <- coxph(Surv(time=merge_data$score,event=merge_data$response) ~  CASP1,data=merge_data)
                     cox3 <- coxph(Surv(time=merge_data$score,event=merge_data$response) ~  CASP4,data=merge_data)
                     merge_data$prob1 <- c(1-summary(survfit(cox1,newdata = merge_data),times=10)$surv)
                     merge_data$prob2 <- c(1-summary(survfit(cox2,newdata = merge_data),times=10)$surv)
                     merge_data$prob3 <- c(1-summary(survfit(cox3,newdata = merge_data),times=10)$surv)
                     dca(Surv(time=merge_data$score,event=merge_data$response)~prob1+prob2+prob3,
                         data=merge_data,time =100) %>% plot(show_ggplot_code=T)
                     
                     
                     #1.LASSO
                     library(survival)
                     library(glmnet)
                     library(ggplot2)
                     library(ggsci)
                     library(patchwork)
                     library(limma)
                     
                     inputFile=".txt"    
                     C="C"                       
                     rt=read.table(inputFile, header=T, sep="\t", check.names=F)
                     rt=as.matrix(rt)
                     rownames(rt)=rt[,1]
                     exp=rt[,2:ncol(rt)]
                     dimnames=list(rownames(exp),colnames(exp))
                     data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
                     data=avereps(data)
                     data=t(data)
                     data=data[,read.table("disease.txt", header=F, sep="\t", check.names=F)[,1]]
                     sample=read.table("sample.txt",sep="\t",header=F,check.names=F,row.names = 1)
                     data=data[rownames(sample),]
                     x=as.matrix(data)
                     afcon=sum(sample[,1]==C)
                     group=c(rep("0",afcon),rep("1",nrow(data)-afcon))
                     group=as.matrix(group)
                     rownames(group)=rownames(data)
                     y=as.matrix(group[,1])
                     
                     set.seed(123)
                     cvfit = cv.glmnet(x, y,family = "binomial", nlambda=100, alpha=1,nfolds = 10) 
                     fit <- glmnet(x,y,family = "binomial")
                     cvfit$lambda.min
                     
                     coef <- coef(fit, s = cvfit$lambda.min)
                     index <- which(coef != 0)
                     actCoef <- coef[index]
                     lassoGene=row.names(coef)[index]
                     geneCoef=cbind(Gene=lassoGene, Coef=actCoef)
                     write.table(geneCoef, file="geneCoef.xls", sep="\t", quote=F, row.names=F)
                     write.table(file="lassoset.txt",lassoGene,sep="\t",quote=F,col.names=F,row.names=F) 
                     
                     pdf("lasso.pdf",height = 5,width = 7)
                     layout(matrix(c(1,1,2,2), 2, 2, byrow = F)) 
                     plot(fit,xvar = 'lambda')
                     plot(cvfit)
                     abline(v=log(c(cvfit$lambda.min,cvfit$lambda.1se)),lty="dashed")
                     dev.off()
                     
                     #2.randomForest
                     library(randomForest)
                     library(limma)
                     library(ggpubr)
                     set.seed(123)
                     
                     inputFile=".txt"      
                     C="C"                       
                     rt=read.table(inputFile, header=T, sep="\t", check.names=F)
                     rt=as.matrix(rt)
                     rownames(rt)=rt[,1]
                     exp=rt[,2:ncol(rt)]
                     dimnames=list(rownames(exp),colnames(exp))
                     data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
                     data=avereps(data)
                     data=t(data)
                     data=data[,read.table("disease.txt", header=F, sep="\t", check.names=F)[,1]]
                     sample=read.table("sample.txt",sep="\t",header=F,check.names=F,row.names = 1)
                     data=data[rownames(sample),]
                     colnames(data)=gsub("-", "afaf", colnames(data))
                     afcon=sum(sample[,1]==C)
                     group=c(rep("con",afcon),rep("treat",nrow(data)-afcon))
                     rf=randomForest(as.factor(group)~., data=data, ntree=500)
                     pdf(file="forest.pdf", width=6, height=6)
                     plot(rf, main="Random forest", lwd=2)
                     dev.off()
                     optionTrees=which.min(rf$err.rate[,1])
                     optionTrees
                     rf2=randomForest(as.factor(group)~., data=data, ntree=optionTrees)
                     importance=importance(x=rf2)
                     importance=as.data.frame(importance)
                     importance$size=gsub("-", "afaf", importance$size)
                     importance$size=rownames(importance)
                     importance=importance[,c(2,1)]
                     names(importance)=c("Gene","importance")
                     af=importance[order(importance$importance,decreasing = T),]
                     af=af[1:20,]
                     p=ggdotchart(af, x = "Gene", y = "importance",
                                  color = "importance", # Custom color palette
                                  sorting = "descending",                      
                                  add = "segments",                            
                                  add.params = list(color = "lightgray", size = 2), 
                                  dot.size = 6,                       
                                  font.label = list(color = "white", size = 9,
                                                    vjust = 0.5),              
                                  ggtheme = theme_bw()         ,               
                                  rotate=TRUE                                       ) 
                     p1=p+ geom_hline(yintercept = 0, linetype = 2, color = "lightgray")+
                       gradient_color(palette =c(ggsci::pal_npg()(2)[2],ggsci::pal_npg()(2)[1])      ) +
                       grids()   
                     pdf(file="importance.pdf", width=6, height=6)
                     print(p1)
                     dev.off()
                     rfGenes=importance[order(importance[,"importance"], decreasing = TRUE),]
                     write.table(rfGenes, file="rfGenes.xls", sep="\t", quote=F, col.names=T, row.names=F)
                     
                     #3.SVM-RFE
                     library(tidyverse)
                     library(glmnet)
                     source('msvmRFE.R')   
                     library(VennDiagram)
                     library(sigFeature)
                     library(e1071)
                     library(caret)
                     library(randomForest)
                     library(limma)
                     
                     inputFile=".txt"     
                     C="C"                      
                     rt=read.table(inputFile, header=T, sep="\t", check.names=F)
                     rt=as.matrix(rt)
                     rownames(rt)=rt[,1]
                     exp=rt[,2:ncol(rt)]
                     dimnames=list(rownames(exp),colnames(exp))
                     data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
                     data=avereps(data)
                     data=t(data)
                     data=data[,read.table("disease.txt", header=F, sep="\t", check.names=F)[,1]]
                     sample=read.table("sample.txt",sep="\t",header=F,check.names=F,row.names = 1)
                     data=data[rownames(sample),]
                     afcon=sum(sample[,1]==C)
                     group=c(rep("0",afcon),rep("1",nrow(data)-afcon))
                     group=as.matrix(as.numeric(group))
                     rownames(group)=rownames(data)
                     colnames(group)="Type"
                     input <- as.data.frame(cbind(group,data))
                     input$Type=as.factor(input$Type)
                     svmRFE(input, k = 10, halve.above = 100) 
                     nfold = 10
                     nrows = nrow(input)
                     folds = rep(1:nfold, len=nrows)[sample(nrows)]
                     folds = lapply(1:nfold, function(x) which(folds == x))
                     results = lapply(folds, svmRFE.wrap, input, k=10, halve.above=100) 
                     top.features = WriteFeatures(results, input, save=F) head(top.features)
                     write.csv(top.features,"feature_svm.csv")
                     featsweep = lapply(1:X, FeatSweep.wrap, results, input) 
                     no.info = min(prop.table(table(input[,1])))
                     errors = sapply(featsweep, function(x) ifelse(is.null(x), NA, x$error))
                     pdf("svm-error.pdf",width = 5,height = 5)
                     PlotErrors(errors, no.info=no.info) 
                     dev.off()
                     
                     pdf("svm-accuracy.pdf",width = 5,height = 5)
                     Plotaccuracy(1-errors,no.info=no.info) 
                     dev.off()
                     which.min(errors) 
                     
                     
                     setwd("")
                     
                     mRNA<-read.table("GSE87466_exp_mean.txt",header=TRUE,sep="\t")
                     
                     source('cibersort.R')
                     
                     result1 <- CIBERSORT('LM22.txt','GSE87466_exp_mean.txt', perm = 100, QN = T) #perm?û?????=1000??QN??λ????һ??=TRUE
                     
                     protein_actfile <- read.table("GSE87466_exp_mean.txt",header=T,row.names = 1)
                     
                     an<- data.frame(sample=colnames(protein_actfile),group=c(rep("UC",87), rep("Normal",21)))
                     
                     anno_color<-an[,2,drop=F]
                     rownames(anno_color) <- an$sample
                     
                     
                     #热图
                     TME.result <- read.table('CIBERSORT-Results.txt', sep = "\t",header = TRUE, check.names = FALSE,row.names = 1)
                     re <- TME.result[,-(23:25)]
                     library(pheatmap)
                     k <- apply(re,2,function(x) {sum(x == 0) < nrow(TME.result)/2})
                     table(k)
                     #Group = str_sub(colnames(exp),1,str_length(colnames(exp))-2)
                     #table(Group)
                     
                     re2 <- as.data.frame(t(re[,k]))
                     pheatmap(re2,scale = "row",
                              show_colnames = F,
                              cluster_cols = F,
                              annotation_col = anno_color,
                              color = colorRampPalette(c("navy", "white", "firebrick3"))(50)
                              ,file="ciber_pheatmap.pdf")
                     
                     
                     
                     cell <- read.table("CIBERSORT-Results.txt",sep = "\t",header = T,quote = "",check.names = F)
                     #filter to 140samples
                     cell <- cell[which(cell$`P-value`<=0.05),]
                     cell <- cell[,-c(ncol(cell),ncol(cell)-1,ncol(cell)-2)]
                     
                     colnames(cell) <- gsub("Mixture","sam",colnames(cell))
                     #cell<-cell[,-6]
                     #cell$sam<-gsub("-",".",cell$sam)
                     
                     cluster <- read.table("cluster_boxplot.txt",header = T,row.names = 1,quote = "",sep = "\t")
                     #cluster1<-cluster[,c(1,2,4)]
                     mybox <- merge(cell,cluster,by = "sam")
                     #mybox <- mybox[,-c(23,25,26)]
                     #mybox <- mybox[,-c(ncol(mybox)-1,ncol(mybox),ncol(mybox)-3)]
                     library(reshape)
                     library(tidyr)
                     library(dplyr)
                     library(ggplot2)
                     library(ggpubr)
                     mybox <- melt(data = mybox,id.vars=c("group","sam"))
                     pdf("cibersort-boxplot.pdf", width = 10, height = 5)
                     
                     ggplot(mybox, aes(x = variable, y = value))+ 
                       labs(y="Cell Proportion", x =  NULL, title = "Cell Proportion")+  
                       geom_boxplot(aes(fill = group), position = position_dodge(0.5), width = 0.5, outlier.alpha = 0)+ 
                       scale_fill_manual(values = c("#4c6c43", "#f1a19a")) +
                       theme_bw() + 
                       theme(plot.title = element_text(size = 12,color="black",hjust = 1), 
                             axis.text.x = element_text(angle = 45, hjust = 1 ),
                             panel.grid = element_blank(),
                             legend.position = "top",
                             legend.text = element_text(size= 12),
                             legend.title= element_text(size= 12)) + 
                       stat_compare_means(aes(group =  group),
                                          label = "p.signif",
                                          method = "wilcox.test",
                                          hide.ns = T)
                     dev.off()
                     dev.new()
                     library(tidyr)
                     library(dplyr)
                     library(RColorBrewer)
                     mypalette <- colorRampPalette(brewer.pal(8,"Set1"))
                     ggplot(mybox, aes(sam, value, fill = variable)) + 
                       geom_bar(stat = "identity") +
                       labs(fill = "Celltype",x = "", y = "Estiamted Proportion") + 
                       theme_bw() +
                       theme(axis.text.x = element_blank(),
                             axis.ticks.x = element_blank(),
                             legend.position = "bottom") + 
                       scale_y_continuous(expand = c(0.01,0)) +
                       scale_fill_manual(values = mypalette(22))
                     
                     
                     library("ggstatsplot")
                     library("BiocManager") 
                     exp <-  read.table("GSE87466_exp_mean.txt", header = T, sep = "\t", row.names = 1)
                     
                     corr.result<-cor(re,method = 'pearson')
                     write.table(corr.result, "corr.result.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(re)
                     write.table(corr.p, "corr.p.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )
                     
                     key.gene <- exp[c("CASP1","CASP4"),]
                     cor_matrix <- cbind(t(key.gene),re)
                     corr.result<-cor(cor_matrix,method = 'pearson')
                     write.table(corr.result, "corr.result_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(cor_matrix)
                     write.table(corr.p, "corr.p_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )
                     
                     
                     
                     setwd("")
                     
                     mRNA<-read.table("GSE66407_exp.txt",header=TRUE,sep="\t")
                     
                     source('cibersort.R')
                     
                     result1 <- CIBERSORT('LM22.txt','GSE66407_exp.txt', perm = 100, QN = T) #perm?û?????=1000??QN??λ????һ??=TRUE
                     
                     protein_actfile <- read.table("GSE66407_exp.txt",header=T,row.names = 1)
                     
                     an<- data.frame(sample=colnames(protein_actfile),group=c(rep("UC",161), rep("Normal",99)))
                     
                     anno_color<-an[,2,drop=F]
                     rownames(anno_color) <- an$sample
                     
                     #热图
                     TME.result <- read.table('CIBERSORT-Results.txt', sep = "\t",header = TRUE, check.names = FALSE,row.names = 1)
                     re <- TME.result[,-(23:25)]
                     library(pheatmap)
                     k <- apply(re,2,function(x) {sum(x == 0) < nrow(TME.result)/2})
                     table(k)
                     #Group = str_sub(colnames(exp),1,str_length(colnames(exp))-2)
                     #table(Group)
                     
                     re2 <- as.data.frame(t(re[,k]))
                     pheatmap(re2,scale = "row",
                              show_colnames = F,
                              cluster_cols = F,
                              annotation_col = anno_color,
                              color = colorRampPalette(c("navy", "white", "firebrick3"))(50)
                              ,file="ciber_pheatmap.pdf")
                     
                     
                     
                     cell <- read.table("CIBERSORT-Results.txt",sep = "\t",header = T,quote = "",check.names = F)
                     #filter to 140samples
                     cell <- cell[which(cell$`P-value`<=0.05),]
                     cell <- cell[,-c(ncol(cell),ncol(cell)-1,ncol(cell)-2)]
                     
                     colnames(cell) <- gsub("Mixture","sam",colnames(cell))
                     #cell<-cell[,-6]
                     #cell$sam<-gsub("-",".",cell$sam)
                     
                     cluster <- read.table("cluster_boxplot.txt",header = T,row.names = 1,quote = "",sep = "\t")
                     #cluster1<-cluster[,c(1,2,4)]
                     mybox <- merge(cell,cluster,by = "sam")
                     #mybox <- mybox[,-c(23,25,26)]
                     #mybox <- mybox[,-c(ncol(mybox)-1,ncol(mybox),ncol(mybox)-3)]
                     library(reshape)
                     library(tidyr)
                     library(dplyr)
                     mybox <- melt(data = mybox,id.vars=c("group","sam"))
                     pdf("cibersort-boxplot.pdf", width = 10, height = 5)
                     
                     ggplot(mybox, aes(x = variable, y = value))+ 
                       labs(y="Cell Proportion", x =  NULL, title = "Cell Proportion")+  
                       geom_boxplot(aes(fill = group), position = position_dodge(0.5), width = 0.5, outlier.alpha = 0)+ 
                       scale_fill_manual(values = c("#4c6c43", "#f1a19a")) +
                       theme_bw() + 
                       theme(plot.title = element_text(size = 12,color="black",hjust = 1), 
                             axis.text.x = element_text(angle = 45, hjust = 1 ),
                             panel.grid = element_blank(),
                             legend.position = "top",
                             legend.text = element_text(size= 12),
                             legend.title= element_text(size= 12)) + 
                       stat_compare_means(aes(group =  group),
                                          label = "p.signif",
                                          method = "wilcox.test",
                                          hide.ns = T)
                     dev.off()
                     library(tidyr)
                     library(dplyr)
                     library(RColorBrewer)
                     mypalette <- colorRampPalette(brewer.pal(8,"Set1"))
                     ggplot(mybox, aes(sam, value, fill = variable)) + 
                       geom_bar(stat = "identity") +
                       labs(fill = "Celltype",x = "", y = "Estiamted Proportion") + 
                       theme_bw() +
                       theme(axis.text.x = element_blank(),
                             axis.ticks.x = element_blank(),
                             legend.position = "bottom") + 
                       scale_y_continuous(expand = c(0.01,0)) +
                       scale_fill_manual(values = mypalette(22))
                     
                     
                     library("ggstatsplot")
                     library("BiocManager") 
                     setwd("")
                     exp <-  read.table("GSE66407_exp.txt", header = T, sep = "\t", row.names = 1)
                     
                     corr.result<-cor(re,method = 'pearson')
                     write.table(corr.result, "corr.result.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(re)
                     write.table(corr.p, "corr.p.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )
                     
                     key.gene <- exp[c("CASP1","CASP4"),]
                     cor_matrix <- cbind(t(key.gene),re)
                     corr.result<-cor(cor_matrix,method = 'pearson')
                     write.table(corr.result, "corr.result_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(cor_matrix)
                     write.table(corr.p, "corr.p_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )
                     
                     
                     
                     
                     
                     setwd("")
                     
                     mRNA<-read.table("GSE75214_exp_mean.txt",header=TRUE,sep="\t")
                     
                     source('cibersort.R')
                     
                     result1 <- CIBERSORT('LM22.txt','GSE75214_exp_mean.txt', perm = 100, QN = T) #perm?û?????=1000??QN??λ????һ??=TRUE
                     
                     protein_actfile <- read.table("GSE75214_exp_mean.txt",header=T,row.names = 1)
                     
                     an<- data.frame(sample=colnames(protein_actfile),group=c(rep("UC",97), rep("Normal",22)))
                     
                     anno_color<-an[,2,drop=F]
                     rownames(anno_color) <- an$sample
                     
                     
                     #热图
                     TME.result <- read.table('CIBERSORT-Results.txt', sep = "\t",header = TRUE, check.names = FALSE,row.names = 1)
                     re <- TME.result[,-(23:25)]
                     library(pheatmap)
                     k <- apply(re,2,function(x) {sum(x == 0) < nrow(TME.result)/2})
                     table(k)
                     #Group = str_sub(colnames(exp),1,str_length(colnames(exp))-2)
                     #table(Group)
                     
                     re2 <- as.data.frame(t(re[,k]))
                     pheatmap(re2,scale = "row",
                              show_colnames = F,
                              cluster_cols = F,
                              annotation_col = anno_color,
                              color = colorRampPalette(c("navy", "white", "firebrick3"))(50)
                              ,file="ciber_pheatmap.pdf")
                     
                     
                     
                     cell <- read.table("CIBERSORT-Results.txt",sep = "\t",header = T,quote = "",check.names = F)
                     #filter to 140samples
                     cell <- cell[which(cell$`P-value`<=0.05),]
                     cell <- cell[,-c(ncol(cell),ncol(cell)-1,ncol(cell)-2)]
                     
                     colnames(cell) <- gsub("Mixture","sam",colnames(cell))
                     #cell<-cell[,-6]
                     #cell$sam<-gsub("-",".",cell$sam)
                     
                     cluster <- read.table("cluster_boxplot.txt",header = T,row.names = 1,quote = "",sep = "\t")
                     #cluster1<-cluster[,c(1,2,4)]
                     mybox <- merge(cell,cluster,by = "sam")
                     #mybox <- mybox[,-c(23,25,26)]
                     #mybox <- mybox[,-c(ncol(mybox)-1,ncol(mybox),ncol(mybox)-3)]
                     library(reshape)
                     library(tidyr)
                     library(dplyr)
                     mybox <- melt(data = mybox,id.vars=c("group","sam"))
                     pdf("cibersort-boxplot.pdf", width = 10, height = 5)
                     
                     ggplot(mybox, aes(x = variable, y = value))+ 
                       labs(y="Cell Proportion", x =  NULL, title = "Cell Proportion")+  
                       geom_boxplot(aes(fill = group), position = position_dodge(0.5), width = 0.5, outlier.alpha = 0)+ 
                       scale_fill_manual(values = c("#4c6c43", "#f1a19a")) +
                       theme_bw() + 
                       theme(plot.title = element_text(size = 12,color="black",hjust = 1), 
                             axis.text.x = element_text(angle = 45, hjust = 1 ),
                             panel.grid = element_blank(),
                             legend.position = "top",
                             legend.text = element_text(size= 12),
                             legend.title= element_text(size= 12)) + 
                       stat_compare_means(aes(group =  group),
                                          label = "p.signif",
                                          method = "wilcox.test",
                                          hide.ns = T)
                     dev.off()
                     dev.new()
                     library(tidyr)
                     library(dplyr)
                     library(RColorBrewer)
                     mypalette <- colorRampPalette(brewer.pal(8,"Set1"))
                     ggplot(mybox, aes(sam, value, fill = variable)) + 
                       geom_bar(stat = "identity") +
                       labs(fill = "Celltype",x = "", y = "Estiamted Proportion") + 
                       theme_bw() +
                       theme(axis.text.x = element_blank(),
                             axis.ticks.x = element_blank(),
                             legend.position = "bottom") + 
                       scale_y_continuous(expand = c(0.01,0)) +
                       scale_fill_manual(values = mypalette(22))
                     
                     
                     library("ggstatsplot")
                     library("BiocManager") 
                     
                     exp <-  read.table("GSE75214_exp_mean.txt", header = T, sep = "\t", row.names = 1)
                     
                     corr.result<-cor(re,method = 'pearson')
                     write.table(corr.result, "corr.result.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(re)
                     write.table(corr.p, "corr.p.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )
                     
                     key.gene <- exp[c("CASP1","CASP4"),]
                     cor_matrix <- cbind(t(key.gene),re)
                     corr.result<-cor(cor_matrix,method = 'pearson')
                     write.table(corr.result, "corr.result_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     
                     corr.p<-ggcorrplot::cor_pmat(cor_matrix)
                     write.table(corr.p, "corr.p_key.gene.txt",row.names = T,col.names = T,quote = F,sep="\t")
                     ggcorrplot(
                       corr = corr.result,
                       type = 'full',
                       # method = "circle",
                       lab = TRUE,
                       lab_size = 2,
                     )